# ===============================================================================================================================
# ===============================================================================================================================
# Set up all of the packages, pathways, and functions I need.
# ===============================================================================================================================
# ===============================================================================================================================

# require(devtools)
# install_version("fixest", version = "0.8.0")
sessionInfo()

# So, it is kind of like a universal "&" sign in excel that combines strings without and spaces between them
# this is a shorthand to avoid the paste function
`%c%` <- function(...){
  paste0(...,colapse='')
}
# ===============================================================================================================================
# ===============================================================================================================================
# Import some analysis and data handling packages.
# ===============================================================================================================================
# ===============================================================================================================================
library(haven)
library(summarytools)
library(pryr)
library(rlang)
library(tidyverse)
# library(sjlabelled)
library(openxlsx)
library(DescTools)
library(labelled)
library(tictoc)
library(arrow)
library(fixest)
library(ggplot2)
library(ggridges)
#ggplot2 wrapper for fixest's iplot function (see https://github.com/lrberge/fixest/issues/179?_pjax=%23repo-content-pjax-container)
# library(ggiplot) # remotes::install_github('grantmcdermott/ggiplot') #I made my own wrapper!
library(plotly)
library(htmlwidgets)
library(ggsci)  #color palettes for journals
library(RColorBrewer)

library(readxl)
library(microbenchmark)
library(qs)
library(utils)
library(comprehenr)
library(skimr)
library(magrittr)

#exports to excel EG05272022
if(!require('writexl')) {
  install.packages('writexl')
  library('writexl')
}

# ===============================================================================================================================
# Set up all of the paths desired
# ===============================================================================================================================

cdd = list(
  'p' = cd,
  'p_d' = cd %c% '/Data',
  'p_d_sg' = cd %c% '/Data/Safegraph',
  'p_d_sg_pa' = cd %c% '/Data/Safegraph/PlasmaAnalysis',
  'p_d_sg_pb' = cd %c% '/Data/Safegraph/PaydayBans',
  'p_d_sg_va' = cd %c% '/Data/Safegraph/VisitorAnalysis',
  'p_d_sg_pva' = cd %c% '/Data/Safegraph/PlasmaVisitorAnalysis',
  'p_d_sg_1s' = cd %c% '/Data/Safegraph/1stStageAnalysis',
  'p_d_cl' = cd %c% '/Data/Clarity',
  'p_d_cl_pa' = cd %c% '/Data/Clarity/PlasmaAnalysis',
  'p_d_cl_pb' = cd %c% '/Data/Clarity/PaydayBans',
  'p_d_acs_pa' = cd %c% '/Data/ACS/Analysis',
  'p_r' = cd %c% '/Results',
  'p_rt' = cd %c% '/Results/Tables',
  'p_rt_sg' = cd %c% '/Results/Tables/Safegraph',
  'p_rt_cl' = cd %c% '/Results/Tables/Clarity',
  'p_rf' = cd %c% '/Results/Figures',
  'p_rf_sg' = cd %c% '/Results/Figures/Safegraph',
  'p_rf_cl' = cd %c% '/Results/Figures/Clarity',
  'p_rm' = cd %c% '/Results/Models',
  'p_rm_sg' = cd %c% '/Results/Models/Safegraph',
  'p_rm_cl' = cd %c% '/Results/Models/Clarity'
)




# ===============================================================================================================================
# Some utility functions for me to use.
# ===============================================================================================================================


my_etable <- function (est, file = NULL, replace = TRUE){
  require(fixest)
  tbl <- hush(etable(est,tex=TRUE))
  if (!missing(file)) {
    cat(tbl, file = file, append = !replace)
  } else {
    return(tbl)
  }
}

my_etable_df <- function(est,signif.code=c("***"=0.01,"**"=0.05,"*"=0.10)){
  tbl_all <- etable(est, signif.code=signif.code)
  tbl_all <- cbind(names = rownames(tbl_all), tbl_all)
  return(tbl_all)
}




loadRData <- function(fileName){
  #loads an RData file, and returns it
  load(fileName)
  get(ls()[ls() != "fileName"])
}


hush <- function(code){
  sink("NUL") # use /dev/null in UNIX
  tmp = code
  sink()
  return(tmp)
}


winsorize_x = function(x, cut_l = 0, cut_u = 0){
  cut_point_top <- quantile(x, 1 - cut_u, na.rm = T)
  cut_point_bottom <- quantile(x, cut_l, na.rm = T)
  i = which(x >= cut_point_top)
  x[i] = cut_point_top
  j = which(x <= cut_point_bottom)
  x[j] = cut_point_bottom
  return(x)
}

interactions = function(df, df_desc, inter_indep, inter_fe, inter_type){
  #Get the value and variable
  inter_val <- df_desc$value
  inter_var <- distinct(df_desc, variable)[[1,'variable']]
  inter_var_labels <- as.list(setNames(df_desc$description, df_desc$variable))
  inter_val_labels <- as.list(setNames(df_desc$label_val, df_desc$value))
  #Create the fixed effect string
  inter_fe_ft <- gsub('^[ ]*','',strsplit(inter_fe,"\\+")[[1]])
  inter_fe_f <- ''
  for (v in inter_fe_ft) {inter_fe_f <- inter_fe_f %c% ' + ' %c% inter_var %c% '^' %c% v}
  inter_fe_f <- gsub('^[ ]*\\+[ ]*','',inter_fe_f)
  inter_indep_f <- inter_indep
  inter_tvars <- c()
  v <- 1
  inter_labels <- list()
  for (v in inter_val){
    if (inter_type=='specific') { nvar <- inter_var %c% '_' %c% v }
    if (inter_type=='generic') { nvar <- 'interq_' %c% v }
    
    df <- df %>% mutate(!!nvar  := case_when(get(inter_var)==v ~ 1, get(inter_var)!=v ~ 0,
                                             as.character(get(inter_var))==inter_val_labels[v] ~ 1,
                                             as.character(get(inter_var))!=inter_val_labels[v] ~ 0))
    #Label the numerical interaction var
    if (inter_type=='specific') { inter_labels[[nvar]] <- inter_var_labels[inter_var] %c% ' Q' %c% v }
    if (inter_type=='generic') { inter_labels[[nvar]] <- 'Inter. Q' %c% v }
    #Create the string with the interacted indep var
    inter_indep_f <- inter_indep_f %c% ' + ' %c% nvar %c% ':' %c% inter_indep 
    inter_tvars <- c(inter_tvars,nvar)
  }
  var_label(df) <- inter_labels
  return(list('df'=df,'inter_fe'=inter_fe_f,'inter_indep_f'=inter_indep_f,'inter_tvars'=inter_tvars,'inter_labels'=inter_labels))
}



interactions2 = function(df, inter_var, inter_desc, inter_indep, inter_fe, inter_type){
  #Get the value and variable
  inter_val <- inter_desc[['base']]
  inter_var_labels <- list()
  inter_var_labels[[inter_var]] <- inter_desc[['var_label']]
  inter_val_labels <- as.list(inter_desc[['val_label']])
  inter_vals <- unlist(inter_val_labels, use.names=FALSE)
  #Create the fixed effect string
  inter_fe_ft <- gsub('^[ ]*','',strsplit(inter_fe,"\\+")[[1]])
  inter_fe_f <- ''
  for (v in inter_fe_ft) {inter_fe_f <- inter_fe_f %c% ' + ' %c% inter_var %c% '^' %c% v}
  inter_fe_f <- gsub('^[ ]*\\+[ ]*','',inter_fe_f)
  inter_indep_f <- inter_indep
  inter_tvars <- c()
  v <- 1
  inter_labels <- list()
  for (v in inter_vals){
    if (v==inter_val){ next }
    if (inter_type=='specific') { nvar <- inter_var %c% '_' %c% v }
    if (inter_type=='generic') { nvar <- 'interq_' %c% v }
    
    # df <- df %>% mutate(!!nvar  := case_when(get(inter_var)==v ~ 1, get(inter_var)!=v ~ 0,
    #                                          as.character(get(inter_var))==inter_val_labels[v] ~ 1,
    #                                          as.character(get(inter_var))!=inter_val_labels[v] ~ 0))
    df <- df %>% mutate(!!nvar  := case_when(get(inter_var)==v ~ 1, get(inter_var)!=v ~ 0))
    
    #Label the numerical interaction var
    if (inter_type=='specific') { inter_labels[[nvar]] <- inter_var_labels[[inter_var]] %c% ' Q' %c% v }
    if (inter_type=='generic') { inter_labels[[nvar]] <- 'Inter. Q' %c% v }
    #Create the string with the interacted indep var
    inter_indep_f <- inter_indep_f %c% ' + ' %c% nvar %c% ':' %c% inter_indep
    inter_tvars <- c(inter_tvars,nvar)
  }
  var_label(df) <- inter_labels
  return(list('df'=df,'inter_fe'=inter_fe_f,'inter_indep_f'=inter_indep_f,'inter_tvars'=inter_tvars,'inter_labels'=inter_labels))
}


my_feols <- function(df,labels,form,filem=NULL,lean=TRUE) {
  interaction_types = list('is'='specific','ig'='generic')
  # Create the formulas
  form_ife <- form[['i']] %c% ' | ' %c% form[['fe']]
  formula_est <- as.formula(form[['y']] %c% ' ~ ' %c% form_ife)
  if (not('weight' %in% names(form))){
    form['weight'] = 'na'
  }
  #Estimate the formula.
  tic <- Sys.time()
  if (form[['spm']] == 'na'){
    if (form[['weight']] == 'na'){
      est <- tryCatch({feols(formula_est, cluster= form[['cluster']], df, lean=lean)}, error = function(e){'error'})
    } else {
      est <- tryCatch({feols(formula_est, weight= as.formula('~' %c% form[['weight']]), cluster= form[['cluster']], df, lean=lean)}, error = function(e){'error'})
    }
  } else {
    if (form[['weight']] == 'na'){
      if (form[['spm']]=='s'){ 
        df <- df %>% mutate(temp_inter = get(form[['s']][['var']]))
        val_labels(df) <- list(temp_inter = form[['s']][['val_label']])
        df <- df %>% mutate(temp_inter = to_factor(temp_inter))
        setFixest_dict(unlist(c(labels,list(temp_inter = form[['s']][['var_label']]))))
        est <- tryCatch({feols(formula_est, cluster= form[['cluster']], df, lean=lean, split = ~temp_inter)}, error = function(e){'error'})
        # est <- feols(formula_est, cluster= form[['cluster']], df, lean=lean, split = ~temp_inter)
      }
      if (form[['spm']] %in% names(interaction_types)){
        if (!('base' %in% names(form[['s']]))){next} #skip if the base is not specified
        inter_type <- interaction_types[[form[['spm']]]]
        ret <- interactions2(df=df, inter_var=form[['s']][['var']], inter_desc=form[['s']], inter_indep=form[['i']], inter_fe=form[['fe']], inter_type=inter_type)
        df <- ret$df
        formula_est <- as.formula(form[['y']] %c% ' ~ ' %c% ret$inter_indep_f %c%  '|' %c% ret$inter_fe)
        setFixest_dict(unlist(c(labels, ret$inter_labels)))
        est <- tryCatch({feols(formula_est, cluster= form[['cluster']], df, lean=lean)}, error = function(e){'error'})
      }
    } else {
      if (form[['spm']]=='s'){ 
        names(df)
        df <- df %>% mutate(temp_inter = get(form[['s']][['var']]))
        val_labels(df) <- list(temp_inter = form[['s']][['val_label']])
        df <- df %>% mutate(temp_inter = to_factor(temp_inter))
        setFixest_dict(unlist(c(labels,list(temp_inter = form[['s']][['var_label']]))))
        est <- tryCatch({feols(formula_est, weight= as.formula('~' %c% form[['weight']]), cluster= form[['cluster']], df, lean=lean, split = ~temp_inter)}, error = function(e){'error'})
        # est <- feols(formula_est, weight= as.formula('~' %c% form[['weight']]), cluster= form[['cluster']], df, lean=lean, split = ~temp_inter)
      }
      if (form[['spm']] %in% names(interaction_types)){
        if (!('base' %in% names(form[['s']]))){next} #skip if the base is not specified
        inter_type <- interaction_types[[form[['spm']]]]
        ret <- interactions2(df=df, inter_var=form[['s']][['var']], inter_desc=form[['s']], inter_indep=form[['i']], inter_fe=form[['fe']], inter_type=inter_type)
        df <- ret$df
        formula_est <- as.formula(form[['y']] %c% ' ~ ' %c% ret$inter_indep_f %c%  '|' %c% ret$inter_fe)
        setFixest_dict(unlist(c(labels, ret$inter_labels)))
        est <- tryCatch({feols(formula_est, weight= as.formula('~' %c% form[['weight']]), cluster= form[['cluster']], df, lean=lean)}, error = function(e){'error'})
      }
    }
  }
  if (typeof(est)==typeof('error')){
    return(list('rc'='error'))
  }
  # Create and add some summary information
  toc1 <- Sys.time()
  if (!is.null(filem)){  qsave(est,file=filem)  }
  toc2 <- Sys.time()
  mem <- as.numeric(object_size(est))/1024^2   #Puts it in MB
  est_prop = list('model.formula'=deparse1(formula_est, collapse = ""),'estimation.time' = as.numeric(toc1 - tic,units='secs'), 'save.time' = as.numeric(toc2 - toc1,units='secs'), 'file.size'=mem)
  return(list('rc'='success','est'=est,'est_prop'=est_prop))
}



my_iplot = function(object,labels,file='',time='yes',ref=-1,dep_vars=c(),split='na',sample=c(),rescale=c(),subs=c(),xlab='',ylab='',dpi=120,width=16,height=9,fsize=30,lwidth=1,
                    cscale=RColorBrewer::brewer.pal(7, "Set2"),brew_cscale='jco',cshape=c(16,17,18,19,20),...) {
  
  # Palletes listed at:
  # https://www.nceas.ucsb.edu/sites/default/files/2020-04/colorPaletteCheatsheet.pdf
  # https://www.datanovia.com/en/blog/the-a-z-of-rcolorbrewer-palette/
  # display.brewer.all(colorblindFriendly = TRUE)
  # FOR DPI I suggest 150, 120, 90, of 60
  scale=list('16x9'=list(width=16,height=9),  #Suggested width and height pairs
             '3x2'=list(width=15,height=10),
             '1x1'=list(width=10,height=10))
  # list2env(scale[[formats]], .GlobalEnv) #explode default or chosen specification
  list2env(list(...), .GlobalEnv)  #override any of the designated paramenters
  
  if (!(brew_cscale %in% c('','jco','viridis'))) {cscale <- RColorBrewer::brewer.pal(7, brew_cscale)}
  if ((class(object) == 'fixest_multi')|(class(object) == 'fixest')){
    if (split=='na') {
      estp <- iplot_data(object,type='i') %>% mutate(
        labels = recode(dep_var %>% str_match('lhs: (.*?);') %>% as_tibble() %>% pull(2),!!!labels),
        groups = recode(dep_var %>% str_match('lhs: (.*?);') %>% as_tibble() %>% pull(2),!!!labels)
      )
    }
    if (split=='ig') {
      estp <- iplot_data(object,type='c') %>% mutate(
        labels = recode(dep_var,!!!labels),
        groups = recode(str_extract(estimate_names_raw,'(?<=:)[a-zA-Z_0-9]*$'),!!!labels)
      )
    }
    if (split=='s') {
      estp <- iplot_data(object,type='c') %>% mutate(
        labels = recode(dep_var,!!!labels),
        groups = id
      )
      for (i in names(labels)){
        if (length(unique(estp$sample))>1) {
          temp <- labels[i][[1]] %c% ' '   #for some reason the list element is not evaluated properly within the gsub inside of mutate (it is evaluated in a simple gsub)
          estp <- estp %>% mutate(groups = gsub(i %c% ' ',temp,groups))
        } else {
          estp <- estp %>% mutate(groups = recode(groups,!!!labels))
        }
      }
    }
    if (length(rescale>0)){
      dvs <- unique(estp$dep_var)
      rescale <- sapply(dvs,function(x, rescale) {if (x %in% names(rescale)) {rescale[[x]]} else {1}}, rescale=rescale)
      estp <- estp %>% mutate(scale = recode(dep_var,!!!rescale),
                              estimate = estimate / scale,
                              ci_low =  ci_low / scale,
                              ci_high =  ci_high / scale)
    }
    if ((time=='yes')&(split!='na')) {
      estpt <- estp %>% filter(x==0) %>% mutate(x=-1,ci_low=0,ci_high=0,estimate=0,
                                                estimate_names_raw=gsub("::0","::-1",estimate_names_raw),
                                                estimate_names=gsub("::0","::-1",estimate_names))
      estp <- rbind(estp,estpt)
      
    }
    if (length(dep_vars)>0) {
      estp <- estp %>% mutate(test = grepl(paste(dep_vars, collapse = "|"),dep_var)) %>% filter(test)
    }
    #Allow for some custome substitutions in the groups variables
    if (length(subs)>0) {
      for (i in names(subs)){
        temp <- subs[i][[1]]   #for some reason the list element is not evaluated properly within the gsub inside of mutate (it is evaluated in a simple gsub)
        estp <- estp %>% mutate(groups = gsub(i,temp,groups))
      }
    }
  }  else if(class(object)=='data.frame') {
  }  else {
    return('error: unhandled object type')
  }
  #Get the number of groups
  ng <- n_distinct(estp$groups)
  # for info about shape parameter see http://sape.inf.usi.ch/quick-reference/ggplot2/shape
  gg <- ggplot(data=estp, aes(x, estimate, ymin = ci_low, ymax = ci_high, color=groups))  + theme_minimal() +
    theme(legend.title=element_blank()) +
    geom_hline(yintercept = 0, color = "black", size=1) +
    geom_vline(xintercept = ref, linetype="dotted", color = "black", size=1.5*lwidth) +
    # geom_pointrange(data=estp, size=2, color='black') +
    geom_pointrange(mapping=aes(x, estimate, ymin = ci_low, ymax = ci_high, color=groups, shape=groups),
                    size=1.5*lwidth, position = position_dodge2(width = 0.5, padding = 0.5)) +
    theme(
      # axis.title.x = element_text(face="bold", size=1.5*fsize, color='black'),
      # axis.title.y = element_text(face="bold", size=1.5*fsize, color='black'),
      axis.text.x = element_text(face="plain", size=fsize, color='black'),
      axis.text.y = element_text(face="plain", size=fsize, color='black'),
      legend.text = element_text(face="plain", size=fsize, color='black')) +
    # coord_cartesian(ylim = c(-0.04,0.04)) +
    labs(x=xlab,y=ylab) + theme(legend.position = "bottom")
  if (!(brew_cscale %in% c('jco','viridis'))) {gg <- gg + scale_colour_manual(values = cscale) + scale_shape_manual(values = cshape[1:ng])}
  if (brew_cscale == 'jco') {gg <- gg + scale_color_jco()  + scale_shape_manual(values = cshape[1:ng]) }
  if (brew_cscale == 'viridis') {gg <- gg + scale_color_viridis_d(option='D')  + scale_shape_manual(values = cshape[1:ng])}
  gg
  if (file!=''){ 
    ggsave(file,plot=gg,dpi=dpi,width=width,height=height) 
  }
  return(gg)
}




iplot_data = function(object,ci_level = 0.95,type='i') {
  dict <- getFixest_dict()
  if (type=='i') {
    p <- iplot(object, only.params = TRUE, ci_level = ci_level, dict = dict)
  } else if (type=='c') {
    p <- coefplot(object, only.params = TRUE, ci_level = ci_level, dict = dict)
    
  } else {
    return('ERROR')
  }
  d <- p$prms
  d <- d %>% mutate(x=as.numeric(str_extract(estimate_names_raw,'(?<=::)[-0-9]*(?:)')))
  
  if (class(object) == "fixest_multi") {
    tryCatch({object <- object[lhs=1:.N,sample=1:.N]},error= function(e){error <- TRUE})
    nout <- length(object)
    ids = list()
    for (i in 1:max(d$id)){
      ids[[i]]=names(object)[[1+((i-1)%%nout)]]
    }
    tryCatch({object <- object[sample=1:.N,lhs=1:.N]},error= function(e){error <- TRUE})
    nsamp <- length(object)
    samps = list()
    for (i in 1:max(d$id)){
      samps[[i]]=names(object)[[1+floor(i*length(object)/max(d$id)-0.00001)]]
    }
    d <- d %>% mutate(sample = recode(id,!!!samps),
                      dep_var = recode(id,!!!ids),
                      id = case_when((nout==1)&(nsamp>1) ~ sample,
                                     (nout>1)&(nsamp==1) ~ dep_var,
                                     (nout>1)&(nsamp>1) ~ paste(dep_var,' x ',sample)))
    
    # d$dep_var <- tryCatch({unique(as.character(lapply(object, function(m) paste(m$call$fml[[2]]))))},
    #                       error = function(e){ lapply(as.list(object), function(m){m[['fml']][[2]]}) })
  } else if (class(p$labels) == "integer") {
    p$labels <- as.numeric(p$labels)
  } else if (class(object) == 'fixest') {
    d$dep_var <- paste(object$fml[[2]])
  } else {
    d$dep_var <- paste(object$call$fml[[2]])
  }
  d <- d %>% rowwise() %>% mutate(dep_var = paste(dep_var, collapse=' ')) %>% ungroup()
  return(d)
}

